Data IO/preprocessing

spot_eq = df %>%
  filter(spots == '15_A') %>%
  select(-`...1`)
keep_types = unique(spot_eq$ClusterName)
# keep_types = keep_types[keep_types != "granulocytes"]
pat = create_ppp(spot_eq$X,spot_eq$Y,spot_eq$ClusterName,
                    keep_types = keep_types)
## Registered S3 method overwritten by 'spatstat.geom':
##   method     from
##   print.boxx cli

CytoMAP functionality to replicate:

  1. cell type definition (passing on this for now, lots of examples)
  2. Neighborhood definition (raster-scan vs graph based)
  3. Neighborhood clustering (many ways of calculating neighborhood similarities)
  1. cell type correlations
  2. distance to border or other landmark (may pass on this as well)
  3. pseudospace
  4. region visualization and statistics

Write to file in programita format

colnames(spot_eq)
##  [1] "CellID"                           "patients"                        
##  [3] "spots"                            "groups"                          
##  [5] "ClusterName"                      "size"                            
##  [7] "CD44 - stroma"                    "FOXP3 - regulatory T cells"      
##  [9] "CD8 - cytotoxic T cells"          "p53 - tumor suppressor"          
## [11] "GATA3 - Th2 helper T cells"       "CD45 - hematopoietic cells"      
## [13] "T-bet - Th1 cells"                "beta-catenin - Wnt signaling"    
## [15] "HLA-DR - MHC-II"                  "PD-L1 - checkpoint"              
## [17] "Ki67 - proliferation"             "CD45RA - naive T cells"          
## [19] "CD4 - T helper cells"             "CD21 - DCs"                      
## [21] "MUC-1 - epithelia"                "CD30 - costimulator"             
## [23] "CD2 - T cells"                    "Vimentin - cytoplasm"            
## [25] "CD20 - B cells"                   "LAG-3 - checkpoint"              
## [27] "Na-K-ATPase - membranes"          "CD5 - T cells"                   
## [29] "IDO-1 - metabolism"               "Cytokeratin - epithelia"         
## [31] "CD11b - macrophages"              "CD56 - NK cells"                 
## [33] "aSMA - smooth muscle"             "BCL-2 - apoptosis"               
## [35] "CD25 - IL-2 Ra"                   "CD11c - DCs"                     
## [37] "PD-1 - checkpoint"                "Granzyme B - cytotoxicity"       
## [39] "EGFR - signaling"                 "VISTA - costimulator"            
## [41] "CD15 - granulocytes"              "ICOS - costimulator"             
## [43] "Synaptophysin - neuroendocrine"   "GFAP - nerves"                   
## [45] "CD7 - T cells"                    "CD3 - T cells"                   
## [47] "Chromogranin A - neuroendocrine"  "CD163 - macrophages"             
## [49] "CD45RO - memory cells"            "CD68 - macrophages"              
## [51] "CD31 - vasculature"               "Podoplanin - lymphatics"         
## [53] "CD34 - vasculature"               "CD38 - multifunctional"          
## [55] "CD138 - plasma cells"             "HOECHST1"                        
## [57] "CDX2 - intestinal epithelia"      "Collagen IV - bas. memb."        
## [59] "CD194 - CCR4 chemokine R"         "MMP9 - matrix metalloproteinase" 
## [61] "CD71 - transferrin R"             "CD57 - NK cells"                 
## [63] "MMP12 - matrix metalloproteinase" "DRAQ5"                           
## [65] "CD4+ICOS+"                        "CD4+Ki67+"                       
## [67] "CD4+PD-1+"                        "CD68+CD163+ICOS+"                
## [69] "CD68+CD163+Ki67+"                 "CD68+CD163+PD-1+"                
## [71] "CD68+ICOS+"                       "CD68+Ki67+"                      
## [73] "CD68+PD-1+"                       "CD8+ICOS+"                       
## [75] "CD8+Ki67+"                        "CD8+PD-1+"                       
## [77] "Treg-ICOS+"                       "Treg-Ki67+"                      
## [79] "Treg-PD-1+"                       "X"                               
## [81] "Y"                                "Z"
spot = spot_eq
to_programita = function(spot) {
  feats = spot %>%
    dplyr::select(-c("CellID","patients","spots","groups","X","Y","Z")) %>%
    mutate_at(vars(-ClusterName), ~(scale(.) %>% as.vector)) %>%
    group_by(ClusterName) %>%
    summarise(across(where(is.numeric), ~mean(.x, na.rm = TRUE))) %>%
    dplyr::select(where(~!any(is.na(.)))) %>%
    t
  colnames(feats) = feats[1,]
  feats = feats[-1,]
  feats = apply(feats,2,as.numeric)
  dists=as.matrix(stats::dist(t(feats)),labels=T)
  colnames(dists) <- rownames(dists) <- colnames(feats)
}

Neighborhoods

Ideally, we’d like to input a neighborhood type (w/ parameters) and point pattern, and get a list of neighborhoods out.

Furthermore, we should be able to specify a range of graph types, BLUSPARAMS and neighborhood scalings, and be able to see their

sg = spatgraph(pat,type="gabriel",par=NULL)
plot(sg,pat)

nbhds = sg_to_nbhds(sg,pat)

Scaling neighborhood composition:

  1. comp (default) - divide by total number of cells, yields local composition of cell types
  2. global.comp - divide by maximum number of cells across all neighborhoods per cell type, normalizes each cell type to its own maximum
  3. binary - 1 if cell type is present, 0 otherwise
  4. standardize - normalizes so that cell types have mean 0 and standard deviation of 1
nbhds.obj = scale_nbhds(nbhds,scale = "standardize")
celltype_heatmap(nbhds.obj)

nbhds.obj = scale_nbhds(nbhds,scale = "standardize")
clust.out = cluster_nbhds(nbhds.obj=nbhds.obj)
## Warning in (function (to_check, X, clust_centers, clust_info, dtype, nn, :
## detected tied distances to neighbors, see ?'BiocNeighbors-ties'
centroids = clust.out$centroids
nbhd_celltype_heatmap(centroids)

nbhds.obj = scale_nbhds(nbhds,scale = "comp")
clust.out = cluster_nbhds(nbhds.obj=nbhds.obj)
## Warning in (function (to_check, X, clust_centers, clust_info, dtype, nn, :
## detected tied distances to neighbors, see ?'BiocNeighbors-ties'
centroids = clust.out$centroids
nbhd_celltype_heatmap(centroids)

nbhds.obj = scale_nbhds(nbhds,scale = "global.comp")
clust.out = cluster_nbhds(nbhds.obj=nbhds.obj)
## Warning in (function (to_check, X, clust_centers, clust_info, dtype, nn, :
## detected tied distances to neighbors, see ?'BiocNeighbors-ties'
centroids = clust.out$centroids
nbhd_celltype_heatmap(centroids)

Global.comp and standardize are very similar - makes sense, since they both standardize globally (using the max). comp is different, since it is taking into account only the local composition.

Geometric vs raster vs RNG

clust.out.geom = cluster_nbhds(pat=pat,ntype = "geometric",par=50L, blusparam = SomParam(centers=5L))
nbhd_celltype_heatmap(clust.out.geom$centroids)

clust.out.raster = cluster_nbhds(pat=pat,ntype = "raster",par=50L, blusparam = SomParam(centers=5L))
nbhd_celltype_heatmap(clust.out.raster$centroids)

clust.out.SIG = cluster_nbhds(pat=pat,ntype = "SIG", blusparam = SomParam(centers=5L))
nbhd_celltype_heatmap(clust.out.SIG$centroids)

linked <- linkClusters(
    list(
        c1=clust.out.SIG$clusters,
        c2=clust.out.geom$clusters
    )
)

meta <- igraph::cluster_walktrap(linked)
plot(linked, mark.groups=meta)

Combining to quickly evaluate clusterings

  1. Edge definitions
  2. Neighborhood scaling
  3. clustering parameters 3.1 type of clustering 3.2 parameters per type of clustering
# out = nbhdClusterSweep(pat, scales = c("comp"))
# saveRDS(out,paste0(RESULTS_PATH,'out.nbhdClusterSweep_15A.rds'))
out = readRDS(paste0(RESULTS_PATH,'out.nbhdClusterSweep_15A.rds'))
aris = out$aris

pheatmap(aris)

out$medoids
## [1] "k.5_cluster.fun.louvain_scale.comp_sg.geometricpar.50"
## [2] "k.5_cluster.fun.infomap_scale.comp_sg.geometricpar.75"
## [3] "centers.6_scale.comp_sg.geometricpar.20"
# s = out$medoids[1]
# 
# spl=stringr::str_split(s,"_")[[1]]
# 
# 
# 
# spl2=stringr::str_split(spl[1],stringr::coll("."))[[1]]
# 
# spl2
# 
# setNames(list(spl2[2]),c(spl2[1]))

So we have three “representative” clusterings. Let’s see if/how they affect downstream results.

c1 = cluster_nbhds(pat,ntype="SIG",scale="comp",blusparam = SomParam(centers=5))
c2 = cluster_nbhds(pat,ntype="gabriel",scale="comp",blusparam = SomParam(centers=5))
c3 = cluster_nbhds(pat,ntype="SIG",scale="comp",blusparam = SomParam(centers=9))
linked <- linkClusters(
    list(
        c1=c1$clusters,
        c2=c2$clusters,
        c3=c3$clusters
    )
)
meta <- igraph::cluster_walktrap(linked)
plot(linked, mark.groups=meta)

m = linkClustersMatrix(c1$clusters,c2$clusters)

m
##            1          2           3          4
## 1 0.02250804 0.76566757 0.004892368 0.01236749
## 2 0.07259380 0.01407035 0.625292740 0.03077975
## 3 0.04038330 0.01166667 0.061292472 0.72929293
## 4 0.60259740 0.01978022 0.060876623 0.03183792
nbhd_celltype_heatmap(c1$centroids)

nbhd_celltype_heatmap(c2$centroids)

nbhd_celltype_heatmap(c3$centroids)

region_spatial_plot(c1,pat)

c2 = cluster_nbhds(pat,ntype="geometric",par=50L,scale="comp",blusparam = SomParam(centers=5))

region_spatial_plot(c2,pat)

TODO: reorder levels of region so that graphs look similar, automatically or user-defined

These two clusterings look pretty similar to me. Some small discrepancies, but nothing very interesting here.

Pseudospace analysis

sg = spatgraph(pat,type="geometric",par=50)

nbhds = sg_to_nbhds(sg,pat)
nbhds.obj = scale_nbhds(nbhds,scale = "comp")

clust = cluster_nbhds(pat,nbhds.obj,blusparam = SomParam(centers=5))

region_spatial_plot(clust,pat)

cell_types = c('CD68+CD163+ macrophages',"CD8+ T cells","Tregs","tumor cells","vasculature")
pseudospace_plot(nbhds.obj, clusters = clust$clusters,
                 against = "CD8+ T cells",
                 cell_types = cell_types)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

celltype_pairsplot(nbhds.obj, clust$clusters, cell_types = cell_types)
## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero

## Warning in cor(x, y): the standard deviation is zero